package cn.texous.demo.dsj;

import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;

/**
 *
 */
public class SimpleApp {
    public static void main(String[] args) {

        // Should be some file on your system
        String logFile = "C:/program_files/java/spark/README.md";
        SparkSession spark = SparkSession.builder().appName("Simple Application").getOrCreate();
        Dataset<String> logData = spark.read().textFile(logFile).cache();
        logData.filter(new FilterFunction<String>() {
            public boolean call(String s) throws Exception {
                return s.contains("a");
            }
        });

        long numAs = logData.filter(new FilterFunction<String>() {
            public boolean call(String s) throws Exception {
                return s.contains("a");
            }
        }).count();

        long numBs = logData.filter(new FilterFunction<String>() {
            public boolean call(String s) throws Exception {
                return s.contains("b");
            }
        }).count();

        System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);

        spark.stop();
    }
}
